# Import necessary librariesimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsimport foliumfrom folium.plugins import MarkerClusterfrom matplotlib.colors import LinearSegmentedColormapimport warnings# Set default figure sizeplt.rcParams['figure.figsize'] = (10, 6)# Suppress future warningswarnings.filterwarnings('ignore', category=FutureWarning)# Set seaborn stylesns.set_theme(style="whitegrid")# Load the datasetdf = pd.read_csv("data/security_incidents_cleaned.csv")# Display the first few rowsprint("Preview of the dataset:")df.head()
Preview of the dataset:
year
country
un
ingo
icrc
nrcs_and_ifrc
nngo
other
nationals_killed
nationals_wounded
...
location
latitude
longitude
motive
actor_type
actor_name
details
verified
source
high_impact
0
1997
Cambodia
0
0
1
0
0
0
1
0
...
Unknown
14.070929
103.099916
Unknown
Unknown
Unknown
1 ICRC national staff killed while working in ...
Archived
Archived
False
1
1997
Rwanda
0
4
0
0
0
0
0
0
...
Office/compound
-1.499840
29.634970
Unknown
Unknown
Unknown
3 INGO international (Spanish) staff killed, 1...
Archived
Archived
False
2
1997
Tajikistan
4
0
2
0
0
0
0
0
...
Unknown
38.628173
70.815654
NaN
Unknown
Unknown
3 UN national staff, 1 UN international (Niger...
Archived
Archived
False
3
1997
Somalia
0
1
0
0
0
0
0
0
...
Unknown
-0.358216
42.545087
Political
Non-state armed group: Regional
Al-Itihaad al-Islamiya
1 INGO international staff killed by Al ittiha...
Archived
Archived
False
4
1997
Rwanda
1
0
0
0
0
0
1
0
...
Unknown
-1.950851
30.061508
Political
Unknown
Unknown
1 UN national staff shot and killed in Kigali ...
Archived
Archived
False
5 rows × 35 columns
Code
# Import necessary libraries for interactive plottingimport plotly.express as pximport plotly.graph_objects as gofrom IPython.display import display# Group data by year and country to prepare for animationincidents_by_year_country = df.groupby(['year', 'country']).size().reset_index(name='incidents')# Get total incidents by year for the animation frame sequenceyear_totals = incidents_by_year_country.groupby('year')['incidents'].sum().reset_index()year_totals = year_totals.sort_values('year')# Create animated choropleth map showing incidents by country over timefig = px.choropleth(incidents_by_year_country, locations='country', locationmode='country names', color='incidents', animation_frame='year', color_continuous_scale='Viridis', range_color=[0, incidents_by_year_country['incidents'].max()], title='Security Incidents by Country Over Time', labels={'incidents': 'Number of Incidents'}, height=600)# Improve layoutfig.update_layout( coloraxis_colorbar=dict( title='Number of Incidents' ), geo=dict( showframe=False, showcoastlines=True, projection_type='natural earth' ))# Add slider and play button settingsfig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] =1000fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] =500# Add annotation for total incidents per yearfor year in year_totals['year'].unique(): year_total = year_totals.loc[year_totals['year'] == year, 'incidents'].values[0] fig.frames[int(year-year_totals['year'].min())].layout.annotations = [dict( x=0.5, y=1.05, xref='paper', yref='paper', text=f'Total Incidents in {year}: {year_total}', showarrow=False, font=dict( size=16 ) ) ]# For Quarto output, save as HTMLfig.write_html("images/interactive_incidents_over_time.html")# Display for notebook viewingfig.show()# Create an alternative interactive bar chart with year slideryear_incidents = df.groupby('year').size().reset_index(name='incidents')year_incidents['year'] = year_incidents['year'].astype(str) # Convert year to string for better displayfig2 = px.bar(year_incidents, x='year', y='incidents', title='Interactive Security Incidents by Year', labels={'incidents': 'Number of Incidents', 'year': 'Year'}, height=500)# Add range sliderfig2.update_layout( xaxis=dict( rangeslider=dict(visible=True),type='category'# Use category type for discrete years ), bargap=0.1, template='plotly_white')# Save the interactive bar chartfig2.write_html("images/interactive_yearly_incidents_barchart.html")# Show the bar chartfig2.show()print("Interactive visualizations saved as HTML files in the images directory.")
Interactive visualizations saved as HTML files in the images directory.
Code
# Filter data for Palestine, Ukraine, and South Sudanfocus_countries = ['Occupied Palestinian Territories', 'Ukraine', 'South Sudan']focus_df = df[df['country'].isin(focus_countries)]# Check if we have data for these countriescountries_found = focus_df['country'].unique()print(f"Found data for: {', '.join(countries_found)}")print(f"Total incidents: {len(focus_df)}")# Group data by year and countryincidents_by_year_country = focus_df.groupby(['year', 'country']).size().reset_index(name='incidents')# Get available years rangemin_year = focus_df['year'].min()max_year = focus_df['year'].max()# Create line plot with markersplt.figure(figsize=(14, 7))# Define distinct colors for each countrycolors = {'Palestine': 'green', 'Ukraine': 'blue', 'South Sudan': 'red'}markers = {'Palestine': 'o', 'Ukraine': 's', 'South Sudan': '^'}for country in countries_found: country_data = incidents_by_year_country[incidents_by_year_country['country'] == country] plt.plot(country_data['year'], country_data['incidents'], marker=markers.get(country, 'o'), linewidth=2.5, color=colors.get(country), label=country)plt.title('Security Incidents in Palestine, Ukraine, and South Sudan', fontsize=14)plt.xlabel('Year', fontsize=12)plt.ylabel('Number of Incidents', fontsize=12)plt.grid(True, alpha=0.3)plt.legend(fontsize=12)# Set x-axis to show all yearsplt.xticks(range(min_year, max_year+1), rotation=45)# Add annotations for key eventskey_events = {'Ukraine': [ (2014, 'Crimea annexation'), (2022, 'Full-scale invasion') ],'South Sudan': [ (2011, 'Independence'), (2013, 'Civil war begins'), (2018, 'Peace agreement') ],'Palestine': [ (2008, '2008-09 Gaza War'), (2014, '2014 Gaza War'), (2021, 'May 2021 conflict'), (2023, 'Oct 2023 conflict') ]}# Add annotations if the years are in our datafor country in countries_found:if country in key_events:for year, event in key_events[country]:# Check if this year exists in our data for this country year_data = incidents_by_year_country[(incidents_by_year_country['country'] == country) & (incidents_by_year_country['year'] == year)]ifnot year_data.empty: incidents_value = year_data['incidents'].values[0]# Calculate vertical offset based on the data range y_range = incidents_by_year_country['incidents'].max() - incidents_by_year_country['incidents'].min() offset = y_range *0.1# 10% of the range plt.annotate(f'{event}', xy=(year, incidents_value), xytext=(year, incidents_value + offset), arrowprops=dict(facecolor=colors.get(country), shrink=0.05, width=1, headwidth=5), fontsize=9, color=colors.get(country))plt.tight_layout()plt.savefig('images/three_countries_incidents_comparison.png', dpi=300)plt.show()# Add a bar chart showing casualties for these countries over timeif'total_affected'in focus_df.columns: casualties_by_year = focus_df.groupby(['year', 'country'])['total_affected'].sum().reset_index() plt.figure(figsize=(14, 7))for country in countries_found: country_data = casualties_by_year[casualties_by_year['country'] == country] plt.plot(country_data['year'], country_data['total_affected'], marker=markers.get(country, 'o'), linewidth=2.5, color=colors.get(country), label=country) plt.title('Casualties from Security Incidents in Palestine, Ukraine, and South Sudan', fontsize=14) plt.xlabel('Year', fontsize=12) plt.ylabel('Total Casualties', fontsize=12) plt.grid(True, alpha=0.3) plt.legend(fontsize=12) plt.xticks(range(min_year, max_year+1), rotation=45) plt.tight_layout() plt.savefig('images/three_countries_casualties_comparison.png', dpi=300) plt.show()# Create a stacked bar chart to show the composition of casualtiesifall(col in focus_df.columns for col in ['total_killed', 'total_wounded', 'total_kidnapped']):# Group by year and country, summing up different types of casualties casualty_types = focus_df.groupby(['year', 'country']).agg({'total_killed': 'sum','total_wounded': 'sum','total_kidnapped': 'sum' }).reset_index()# Create subplots, one for each country fig, axes = plt.subplots(len(countries_found), 1, figsize=(14, 4*len(countries_found)), sharex=True)for i, country inenumerate(countries_found): country_data = casualty_types[casualty_types['country'] == country]# Convert to wide format for stacked bar country_data_stacked = country_data.set_index('year')# Plot stacked bar country_data_stacked[['total_killed', 'total_wounded', 'total_kidnapped']].plot( kind='bar', stacked=True, ax=axes[i] iflen(countries_found) >1else axes, color=['darkred', 'orange', 'purple'] )# Set title and labelsiflen(countries_found) >1: axes[i].set_title(f'{country}: Casualties by Type', fontsize=12) axes[i].set_ylabel('Number of Casualties', fontsize=10)if i ==len(countries_found) -1: axes[i].set_xlabel('Year', fontsize=10)else: axes.set_title(f'{country}: Casualties by Type', fontsize=12) axes.set_ylabel('Number of Casualties', fontsize=10) axes.set_xlabel('Year', fontsize=10) plt.tight_layout() plt.savefig('images/three_countries_casualty_types.png', dpi=300) plt.show()
Found data for: South Sudan, Occupied Palestinian Territories, Ukraine
Total incidents: 785
Code
# Get the count of incidents by yearincidents_by_year = df['year'].value_counts().sort_index()# Print the number of incidents for each yearprint("Number of incidents by year:")for year, count in incidents_by_year.items():print(f"{year}: {count} incidents")# Create a bar chart to visualize the trendplt.figure(figsize=(12, 6))sns.barplot(x=incidents_by_year.index, y=incidents_by_year.values, palette='viridis')plt.title('Number of Security Incidents by Year')plt.xlabel('Year')plt.ylabel('Number of Incidents')plt.xticks(rotation=45)plt.grid(axis='y', linestyle='--', alpha=0.7)plt.tight_layout()plt.show()# Get the total number of incidentstotal_incidents =len(df)print(f"\nTotal number of incidents in the dataset: {total_incidents}")# Calculate year rangemin_year = df['year'].min()max_year = df['year'].max()print(f"Dataset covers incidents from {min_year} to {max_year} ({max_year - min_year +1} years)")
Total number of incidents in the dataset: 4314
Dataset covers incidents from 1997 to 2025 (29 years)
Code
# Create an interactive map of all incidentsdef create_incidents_map(data):# Calculate center coordinates for the map (average of all points) center_lat = data['latitude'].mean() center_lon = data['longitude'].mean()# Create a map centered on the average coordinates incidents_map = folium.Map(location=[center_lat, center_lon], zoom_start=2)# Add a marker cluster for better performance with many points marker_cluster = MarkerCluster().add_to(incidents_map)# Add points for each incident with coordinates valid_coords = data[data['latitude'].notna() & data['longitude'].notna()]# Create a color scale based on total_affecteddef get_color(affected):if pd.isna(affected) or affected ==0:return'blue'elif affected <=5:return'green'elif affected <=20:return'orange'else:return'red'for idx, row in valid_coords.iterrows():# Create popup text with incident details popup_text =f""" <b>Country:</b> {row['country']}<br> <b>Year:</b> {row['year']}<br> <b>Total Affected:</b> {row['total_affected']}<br> <b>Attack Type:</b> {row['means_of_attack'] if'means_of_attack'in row and pd.notna(row['means_of_attack']) else'Unknown'}<br> """# Add circle marker folium.CircleMarker( location=[row['latitude'], row['longitude']], radius=5, popup=folium.Popup(popup_text, max_width=300), fill=True, fill_opacity=0.7, color=get_color(row['total_affected']), fill_color=get_color(row['total_affected']) ).add_to(marker_cluster)return incidents_map# Create the mapglobal_incidents_map = create_incidents_map(df)# Save the map as HTML filemap_filename ="images/global_security_incidents_map.html"global_incidents_map.save(map_filename)# Display in notebook (if running in Jupyter)global_incidents_map
Make this Notebook Trusted to load map: File -> Trust Notebook
Code
# Filter data for incidents from 2015-2025recent_df = df[(df['year'] >=2015) & (df['year'] <=2025)]print(f"Number of incidents from 2015-2025: {len(recent_df)}")# Create an interactive map of recent incidents (2015-2025)def create_recent_incidents_map(data):# Calculate center coordinates for the map (average of all points) center_lat = data['latitude'].mean() center_lon = data['longitude'].mean()# Create a map centered on the average coordinates incidents_map = folium.Map(location=[center_lat, center_lon], zoom_start=2)# Add a marker cluster for better performance with many points marker_cluster = MarkerCluster().add_to(incidents_map)# Add points for each incident with coordinates valid_coords = data[data['latitude'].notna() & data['longitude'].notna()]# Create a color scale based on total_affecteddef get_color(affected):if pd.isna(affected) or affected ==0:return'blue'elif affected <=5:return'green'elif affected <=20:return'orange'else:return'red'for idx, row in valid_coords.iterrows():# Create popup text with incident details popup_text =f""" <b>Country:</b> {row['country']}<br> <b>Year:</b> {row['year']}<br> <b>Total Affected:</b> {row['total_affected']}<br> <b>Attack Type:</b> {row['means_of_attack'] if'means_of_attack'in row and pd.notna(row['means_of_attack']) else'Unknown'}<br> """# Add circle marker folium.CircleMarker( location=[row['latitude'], row['longitude']], radius=5, popup=folium.Popup(popup_text, max_width=300), fill=True, fill_opacity=0.7, color=get_color(row['total_affected']), fill_color=get_color(row['total_affected']) ).add_to(marker_cluster)return incidents_map# Create the map for recent incidentsrecent_incidents_map = create_recent_incidents_map(recent_df)# Save the map as HTML filemap_filename ="images/recent_security_incidents_map_2015_2025.html"recent_incidents_map.save(map_filename)print(f"Interactive map of recent incidents (2015-2025) saved as {map_filename}")# Display in notebook (if running in Jupyter)recent_incidents_map
Number of incidents from 2015-2025: 2461
Interactive map of recent incidents (2015-2025) saved as images/recent_security_incidents_map_2015_2025.html
Make this Notebook Trusted to load map: File -> Trust Notebook